loading required libraries.
library(tidyverse)
library(ggplot2)
library(forecast)
library(fpp2)
library(fma)
library(lmtest)
library(dbplyr)
library(lubridate)
library(seasonal)
library(urca)
library(tseries)
library(gridExtra)
food<- read_csv('../data/calendar_factors.csv')
food.252 = as.vector(unlist(food['sales']))
# frequency m = 365
foods_sale = ts(food.252, start=c(2011,29), end = c(2016,144), frequency = 365) # 1941 days
foods_train = window(foods_sale, start=c(2011,29), end = c(2016,116) ) # 1913 days 1-1913
foods_test = window(foods_sale, start=c(2016,117), end = c(2016,144) ) # 28 days 1914-1941
# frequency m = 7
daily_ts = ts(food.252, start=c(1,7), end = c(279,1), frequency = 7) # 1-1941: 2011-1-29 ~ 2016-5-22
daily_train = subset(daily_ts, end = 1913) # training set: 1-1913, 2011-1-29 ~ 2016-4-25 c(1,7)
daily_test = subset(daily_ts, start = 1914) # test set: 1913-1941, 2016-4-26 ~ 2016-5-22 c(275,2)
# m=365
autoplot(foods_train) + ggtitle("Daily unit sales (01/19/2011 - 05/12/2016)") + ylab("Figure.1 Daily unit sales") + xlab("Year")
autoplot(decompose(foods_train, type='additive'))+ ggtitle("Figure.2 Additive decomposistion") + xlab("Year") + ylab("Component") # additive decomposition
autoplot(decompose(foods_train, type='mult')) + ggtitle("Multiplicative decomposition") + xlab("Year") + ylab("Component") # multiplicative decomposition
hist(foods_train, main = 'Distribution of daily unit sales', xlab='Daily unit sale')
# m=7
autoplot(daily_train) + ggtitle("Unit sale time plot")
autoplot(decompose(daily_train, type='additive')) # additive decomposition
autoplot(decompose(daily_train, type='mult')) # multiplicative decomposition
hist(daily_train)
autoplot(diff(daily_train, lag=7))
ggsubseriesplot(daily_train) + ggtitle("Unit sales during a week") # unit sale is higher on weekends
summary(foods_train)
Min. 1st Qu. Median Mean 3rd Qu. Max.
4.00 26.00 36.00 39.34 49.00 147.00
# m=365
print(Acf(foods_train)) # non-stationary, as r1 is large and positive, and the ACF drops to zero slowly
Autocorrelations of series ‘foods_train’, by lag
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1.000 0.567 0.190 0.013 0.009 0.156 0.466 0.660 0.445 0.124 -0.024 -0.025 0.130 0.435 0.625 0.427 0.117 -0.034
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
-0.047 0.090 0.403 0.599 0.410 0.104 -0.044 -0.050 0.094 0.403 0.596 0.384 0.081 -0.066 -0.080 0.073 0.386 0.569
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
0.356 0.058 -0.088 -0.100 0.043 0.348 0.544 0.356 0.050 -0.106 -0.121 0.032 0.333 0.541 0.354 0.042 -0.104 -0.108
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
0.035 0.321 0.501 0.321 0.032 -0.120 -0.121 0.019 0.312 0.478 0.298 0.024 -0.136 -0.156 -0.022 0.274 0.462 0.286
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
0.002 -0.164 -0.167 -0.024 0.272 0.458 0.277 -0.013 -0.159 -0.164 -0.043 0.246 0.427 0.250 -0.046 -0.185 -0.202 -0.057
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
0.218 0.411 0.232 -0.044 -0.191 -0.190 -0.060 0.214 0.399 0.221 -0.041 -0.183 -0.188 -0.060 0.213 0.403 0.233 -0.042
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
-0.196 -0.196 -0.082 0.191 0.379 0.211 -0.074 -0.215 -0.229 -0.096 0.181 0.376 0.200 -0.071 -0.225 -0.236 -0.103 0.194
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
0.362 0.186 -0.085 -0.220 -0.217 -0.090 0.178 0.349 0.190 -0.094 -0.231 -0.247 -0.119 0.147 0.316 0.156 -0.123 -0.268
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
-0.272 -0.139 0.135 0.316 0.135 -0.141 -0.282 -0.275 -0.147 0.127 0.316 0.137 -0.141 -0.276 -0.284 -0.137 0.131 0.300
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
0.131 -0.150 -0.299 -0.292 -0.151 0.128 0.305 0.138 -0.145 -0.280 -0.277 -0.150 0.124 0.292 0.120 -0.142 -0.283 -0.275
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
-0.142 0.123 0.295 0.109 -0.158 -0.292 -0.290 -0.149 0.121 0.283 0.113 -0.159 -0.284 -0.286 -0.143 0.117 0.291 0.104
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
-0.176 -0.304 -0.296 -0.164 0.113 0.280 0.104 -0.164 -0.286 -0.290 -0.146 0.114 0.292 0.118 -0.151 -0.282 -0.286 -0.147
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
0.129 0.292 0.112 -0.151 -0.274 -0.263 -0.113 0.147 0.311 0.129 -0.121 -0.242 -0.234 -0.093 0.174 0.336 0.165 -0.104
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
-0.214 -0.209 -0.076 0.175 0.338 0.162 -0.103 -0.229 -0.222 -0.082 0.176 0.331 0.169 -0.092 -0.224 -0.212 -0.073 0.187
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
0.342 0.173 -0.086 -0.200 -0.181 -0.044 0.201 0.350 0.194 -0.059 -0.172 -0.180 -0.055 0.195 0.355 0.190 -0.067 -0.186
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
-0.175 -0.046 0.205 0.356 0.187 -0.054 -0.199 -0.180 -0.048 0.218 0.372 0.207 -0.049 -0.170 -0.153 -0.019 0.226 0.388
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
0.224 -0.042 -0.161 -0.158 -0.017 0.239 0.389 0.232 -0.029 -0.157 -0.160 -0.027 0.238 0.396 0.236 -0.029 -0.149 -0.119
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
0.006 0.255 0.416 0.248 -0.010 -0.125 -0.111 0.015 0.274 0.436 0.254 -0.006 -0.123 -0.124 0.010 0.272 0.449 0.263
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
0.008 -0.126 -0.124 0.020 0.287 0.443 0.268 0.018 -0.104 -0.095 0.031 0.298 0.471 0.297 0.026 -0.105 -0.089 0.031
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
0.297 0.451 0.284 0.020 -0.101 -0.092 0.033 0.287 0.442 0.280 0.022 -0.106 -0.107 0.029 0.294 0.465 0.287 0.033
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
-0.089 -0.090 0.065 0.334 0.528 0.332 0.054 -0.075 -0.086 0.042 0.304 0.469 0.292 0.023 -0.108 -0.105 0.026 0.287
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
0.435 0.272 0.009 -0.115 -0.119 0.002 0.258 0.430 0.269 0.019 -0.120 -0.119 0.009 0.259 0.423 0.251 -0.003 -0.119
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
-0.115 0.000 0.253 0.390 0.225 -0.021 -0.135 -0.136 -0.027 0.237 0.395 0.233 -0.026 -0.134 -0.143 -0.030 0.214 0.392
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
0.242 -0.020 -0.126 -0.137 -0.025 0.214 0.362 0.209 -0.030 -0.151 -0.144 -0.037 0.200 0.341 0.179 -0.043 -0.173 -0.178
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
-0.068 0.163 0.320 0.170 -0.069 -0.186 -0.187 -0.071 0.175 0.323 0.161 -0.071 -0.187 -0.189 -0.082 0.167 0.307 0.157
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
-0.082 -0.194 -0.202 -0.092 0.135 0.286 0.147 -0.070 -0.192 -0.189 -0.095 0.120 0.280 0.139 -0.091 -0.197 -0.194 -0.095
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
0.127 0.283 0.131 -0.095 -0.203 -0.198 -0.093 0.121 0.263 0.119 -0.104 -0.216 -0.229 -0.115 0.115 0.263 0.115 -0.118
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
-0.233 -0.238 -0.138 0.104 0.246 0.107 -0.119 -0.227 -0.220 -0.128 0.100 0.233 0.089 -0.133 -0.240 -0.246 -0.151 0.072
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
0.210 0.069 -0.158 -0.265 -0.277 -0.174 0.038 0.178 0.032 -0.193 -0.301 -0.291 -0.172 0.053 0.184 0.038 -0.197 -0.301
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
-0.297 -0.192 0.030 0.164 0.023 -0.194 -0.305 -0.312 -0.193 0.034 0.170 0.015 -0.198 -0.304 -0.295 -0.191 0.033 0.167
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
0.021 -0.193 -0.305 -0.300 -0.183 0.019 0.161 0.018 -0.203 -0.305 -0.297 -0.190 0.034 0.165 0.035 -0.192 -0.296 -0.288
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
-0.170 0.032 0.170 0.024 -0.190 -0.291 -0.283 -0.174 0.034 0.165 0.035 -0.179 -0.287 -0.282 -0.169 0.035 0.175 0.044
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
-0.176 -0.283 -0.279 -0.157 0.042 0.183 0.046 -0.161 -0.257 -0.249 -0.137 0.072 0.193 0.060 -0.151 -0.239 -0.233 -0.127
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
0.083 0.204 0.078 -0.136 -0.214 -0.210 -0.110 0.095 0.215 0.091 -0.123 -0.222 -0.213 -0.114 0.089 0.214 0.085 -0.122
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
-0.214 -0.204 -0.104 0.094 0.224 0.094 -0.111 -0.195 -0.199 -0.093 0.103 0.217 0.098 -0.093 -0.187 -0.187 -0.091 0.105
630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
0.231 0.089 -0.101 -0.194 -0.190 -0.091 0.108 0.236 0.095 -0.096 -0.198 -0.187 -0.095 0.110 0.227 0.097 -0.098 -0.182
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
-0.175 -0.081 0.117 0.240 0.116 -0.087 -0.185 -0.176 -0.079 0.117 0.241 0.115 -0.086 -0.188 -0.171 -0.066 0.136 0.254
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683
0.117 -0.077 -0.173 -0.156 -0.052 0.141 0.249 0.119 -0.077 -0.155 -0.149 -0.053 0.137 0.259 0.120 -0.065 -0.154 -0.140
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
-0.038 0.154 0.272 0.144 -0.049 -0.145 -0.142 -0.037 0.148 0.268 0.142 -0.054 -0.142 -0.130 -0.026 0.165 0.290 0.165
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
-0.040 -0.131 -0.120 -0.020 0.180 0.301 0.175 -0.021 -0.113 -0.101 0.003 0.185 0.290 0.161 -0.036 -0.123 -0.103 0.003
720 721 722 723 724 725 726 727 728 729 730
0.190 0.304 0.176 -0.019 -0.101 -0.088 0.020 0.213 0.347 0.206 0.012
summary(ur.kpss(foods_train)) # the test-statistic 3.01 is much larger than the 1pct 0.74, which means that the data is not stationary
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 8 lags.
Value of test-statistic is: 3.0088
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
summary(ur.kpss(diff(foods_train)))
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 8 lags.
Value of test-statistic is: 0.003
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
kpss.test(foods_train) # 0.01
p-value smaller than printed p-value
KPSS Test for Level Stationarity
data: foods_train
KPSS Level = 3.0088, Truncation lag parameter = 8, p-value = 0.01
adf.test(foods_train) # 0.01
p-value smaller than printed p-value
Augmented Dickey-Fuller Test
data: foods_train
Dickey-Fuller = -5.5278, Lag order = 12, p-value = 0.01
alternative hypothesis: stationary
Box.test(foods_train) # p-value is tiny, reject the null hypothesis that there's no autocorrelation.
Box-Pierce test
data: foods_train
X-squared = 614.31, df = 1, p-value < 2.2e-16
Box.test(foods_train,type='Lj') # same as Box-Pierce test
Box-Ljung test
data: foods_train
X-squared = 615.28, df = 1, p-value < 2.2e-16
BoxCox.lambda(foods_train) # 0.31
[1] 0.3054523
# m=7
BoxCox.lambda(daily_ts) # 0.204
[1] 0.2042021
BoxCox.lambda(daily_train) # 0.204
[1] 0.2039999
nsdiffs(foods_train) # 0: seasonal differencing is not required 0
[1] 0
ndiffs(foods_train) # 1: This process of using a sequence of KPSS tests to determine the appropriate number of first differences is carried out by the function ndiffs().As we saw from the KPSS tests above, one difference is required to make the sale data stationary.
[1] 1
nsdiffs(daily_train) # 1
[1] 1
ndiffs(diff(daily_train, lag = 7)) # 0
[1] 0
food_train_df = food[1:1913,]
food_test_df = food[1914:1941,]
food_train_df
round(cor(food_train_df[, c(5,6,7,9,10,11,12)]),2)
wday month year sporting_days cultrural_days national_days religious_days
wday 1.00 0.00 0.00 -0.02 -0.07 -0.01 0.00
month 0.00 1.00 -0.16 -0.05 -0.06 0.02 -0.06
year 0.00 -0.16 1.00 0.00 0.00 0.00 0.01
sporting_days -0.02 -0.05 0.00 1.00 0.03 -0.02 -0.02
cultrural_days -0.07 -0.06 0.00 0.03 1.00 -0.02 0.04
national_days -0.01 0.02 0.00 -0.02 -0.02 1.00 -0.03
religious_days 0.00 -0.06 0.01 -0.02 0.04 -0.03 1.00
fit.lm1 = lm(formula = sales ~ lag(sales) + as.factor(wday) + as.factor(month)+ as.factor(year) + sporting_days + cultrural_days + national_days + religious_days, data = food_train_df)
summary(fit.lm1)
Call:
lm(formula = sales ~ lag(sales) + as.factor(wday) + as.factor(month) +
as.factor(year) + sporting_days + cultrural_days + national_days +
religious_days, data = food_train_df)
Residuals:
Min 1Q Median 3Q Max
-30.859 -6.519 -0.958 5.966 65.517
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.27999 1.27000 19.906 < 2e-16 ***
lag(sales) 0.36092 0.02103 17.160 < 2e-16 ***
as.factor(wday)2 -6.11636 0.97134 -6.297 3.77e-10 ***
as.factor(wday)3 -30.97754 0.96674 -32.043 < 2e-16 ***
as.factor(wday)4 -24.85916 0.92741 -26.805 < 2e-16 ***
as.factor(wday)5 -23.05349 0.94646 -24.358 < 2e-16 ***
as.factor(wday)6 -21.15682 0.94176 -22.465 < 2e-16 ***
as.factor(wday)7 -12.45409 0.92970 -13.396 < 2e-16 ***
as.factor(month)2 3.53303 1.18913 2.971 0.003005 **
as.factor(month)3 5.33709 1.17050 4.560 5.45e-06 ***
as.factor(month)4 8.18739 1.20350 6.803 1.37e-11 ***
as.factor(month)5 9.24058 1.26244 7.320 3.66e-13 ***
as.factor(month)6 14.00795 1.31561 10.648 < 2e-16 ***
as.factor(month)7 15.20330 1.32126 11.507 < 2e-16 ***
as.factor(month)8 19.38193 1.37246 14.122 < 2e-16 ***
as.factor(month)9 12.45209 1.30767 9.522 < 2e-16 ***
as.factor(month)10 10.49660 1.27215 8.251 2.91e-16 ***
as.factor(month)11 9.07927 1.27090 7.144 1.29e-12 ***
as.factor(month)12 10.71883 1.27199 8.427 < 2e-16 ***
as.factor(year)2012 6.32791 0.83049 7.619 4.01e-14 ***
as.factor(year)2013 10.47356 0.87571 11.960 < 2e-16 ***
as.factor(year)2014 9.60922 0.86520 11.106 < 2e-16 ***
as.factor(year)2015 7.52883 0.84285 8.933 < 2e-16 ***
as.factor(year)2016 8.34559 1.24682 6.693 2.87e-11 ***
sporting_days -10.58217 2.73803 -3.865 0.000115 ***
cultrural_days -5.81321 1.78623 -3.254 0.001156 **
national_days 10.28531 1.56108 6.589 5.75e-11 ***
religious_days 4.24155 1.49442 2.838 0.004585 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.61 on 1884 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.6795, Adjusted R-squared: 0.6749
F-statistic: 147.9 on 27 and 1884 DF, p-value: < 2.2e-16
checkresiduals(fit.lm1)
Breusch-Godfrey test for serial correlation of order up to 31
data: Residuals
LM test = 89.077, df = 31, p-value = 1.599e-07
CV(fit.lm1)
CV AIC AICc BIC AdjR2
114.2756442 9059.7334945 9060.6580429 9220.8547422 0.6748823
autoplot(daily_train, series='Observations') +
autolayer(ts(predict.lm(fit.lm1), frequency = 7), series = 'Linear regression') +
ylab("Unit sales") +
xlab("Week") +
ggtitle("Daily unit sales (linear regression model)")
autoplot(daily_test, series = 'Observations') +
autolayer(ts(predict.lm(fit.lm1, food_test_df), frequency = 7, start = c(275,2) ), series = 'Linear regression') +
ylab("Unit sales") +
xlab("Week") +
ggtitle("Predicted daily unit sales (linear regression model)")
lambda = BoxCox.lambda(daily_train)
fit.tslm1 = tslm(daily_train ~ trend + season)
summary(fit.tslm1)
Call:
tslm(formula = daily_train ~ trend + season)
Residuals:
Min 1Q Median 3Q Max
-36.834 -9.540 -2.207 7.454 90.456
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.011e+01 1.043e+00 48.047 <2e-16 ***
trend 5.909e-03 5.976e-04 9.889 <2e-16 ***
season2 -2.365e+01 1.234e+00 -19.161 <2e-16 ***
season3 -2.708e+01 1.234e+00 -21.943 <2e-16 ***
season4 -2.617e+01 1.234e+00 -21.203 <2e-16 ***
season5 -2.425e+01 1.234e+00 -19.650 <2e-16 ***
season6 -1.481e+01 1.234e+00 -11.998 <2e-16 ***
season7 8.453e-01 1.233e+00 0.686 0.493
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.43 on 1905 degrees of freedom
Multiple R-squared: 0.4001, Adjusted R-squared: 0.3979
F-statistic: 181.5 on 7 and 1905 DF, p-value: < 2.2e-16
checkresiduals(fit.tslm1)
Breusch-Godfrey test for serial correlation of order up to 14
data: Residuals from Linear regression model
LM test = 820.75, df = 14, p-value < 2.2e-16
CV(fit.tslm1) # 1.022361e+04
CV AIC AICc BIC AdjR2
2.091685e+02 1.022361e+04 1.022371e+04 1.027362e+04 3.979282e-01
autoplot(daily_train, series = 'Observations') +
autolayer(fitted(fit.tslm1), series = 'fitted') +
xlim(50, 100)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
autoplot(daily_test, series = 'Observations') +
autolayer(forecast(fit.tslm1, h=28), PI = F, series = 'linear regression model') +
ylab("Predited unit sales") +
xlab("Week") +
ggtitle("Predicted daily unit sales (linear regressiom model)")
fit.tslmfouriser1 = tslm(daily_train ~ trend + fourier(daily_train, K=3))
summary(fit.tslmfouriser1)
Call:
tslm(formula = daily_train ~ trend + fourier(daily_train, K = 3))
Residuals:
Min 1Q Median 3Q Max
-36.834 -9.540 -2.207 7.454 90.456
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.6628695 0.6602475 50.985 < 2e-16 ***
trend 0.0059092 0.0005976 9.889 < 2e-16 ***
fourier(daily_train, K = 3)S1-7 13.3219050 0.4665453 28.554 < 2e-16 ***
fourier(daily_train, K = 3)C1-7 6.3226551 0.4668196 13.544 < 2e-16 ***
fourier(daily_train, K = 3)S2-7 2.9797158 0.4666483 6.385 2.14e-10 ***
fourier(daily_train, K = 3)C2-7 -5.0435602 0.4667159 -10.806 < 2e-16 ***
fourier(daily_train, K = 3)S3-7 -1.7778566 0.4667310 -3.809 0.000144 ***
fourier(daily_train, K = 3)C3-7 0.3581564 0.4666335 0.768 0.442860
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.43 on 1905 degrees of freedom
Multiple R-squared: 0.4001, Adjusted R-squared: 0.3979
F-statistic: 181.5 on 7 and 1905 DF, p-value: < 2.2e-16
checkresiduals(fit.tslmfouriser1)
Breusch-Godfrey test for serial correlation of order up to 14
data: Residuals from Linear regression model
LM test = 820.75, df = 14, p-value < 2.2e-16
CV(fit.tslmfouriser1)
CV AIC AICc BIC AdjR2
2.091685e+02 1.022361e+04 1.022371e+04 1.027362e+04 3.979282e-01
fit.ets0 = ets(daily_train) # MAM, 23422.99
fit.ets0
ETS(M,A,M)
Call:
ets(y = daily_train)
Smoothing parameters:
alpha = 0.1955
beta = 3e-04
gamma = 2e-04
Initial states:
l = 13.2435
b = 0.3654
s = 1.016 0.8125 0.7568 0.7335 0.8313 1.3992
1.4507
sigma: 0.281
AIC AICc BIC
23422.82 23422.99 23489.50
fit.ets1 = ets(daily_train, lambda = 'auto') # (A,N,A) 12437.65
fit.ets1
ETS(A,N,A)
Call:
ets(y = daily_train, lambda = "auto")
Box-Cox transformation: lambda= 0.204
Smoothing parameters:
alpha = 0.2183
gamma = 1e-04
Initial states:
l = 3.6266
s = 0.1583 -0.4108 -0.5219 -0.5823 -0.381 0.8425
0.8952
sigma: 0.5884
AIC AICc BIC
12437.53 12437.65 12493.10
fit.ets2 = ets(daily_train, lambda = 0) # (A,N,A) 9747.518
fit.ets2
ETS(A,N,A)
Call:
ets(y = daily_train, lambda = 0)
Box-Cox transformation: lambda= 0
Smoothing parameters:
alpha = 0.2092
gamma = 1e-04
Initial states:
l = 2.5511
s = 0.0854 -0.1963 -0.2504 -0.2819 -0.1841 0.4
0.4274
sigma: 0.2913
AIC AICc BIC
9747.402 9747.518 9802.966
fit.ets3 = ets(daily_train, lambda = 0, damped = TRUE) # (A,Ad,A), 9767.98
fit.ets3
ETS(A,Ad,A)
Call:
ets(y = daily_train, damped = TRUE, lambda = 0)
Box-Cox transformation: lambda= 0
Smoothing parameters:
alpha = 0.2313
beta = 0.0021
gamma = 1e-04
phi = 0.98
Initial states:
l = 2.3039
b = 0.0324
s = 0.0829 -0.1921 -0.2555 -0.2696 -0.1934 0.3976
0.4301
sigma: 0.2926
AIC AICc BIC
9767.784 9767.976 9840.018
# fit.ets3 = ets(diff(daily_train,lag=7), lambda = 0)
# fit.ets3# no model to be fitted in
fit.hw0 = hw(daily_train,h = 28) # "addtive" 23774
fit.hw0$model
Holt-Winters' additive method
Call:
hw(y = daily_train, h = 28)
Smoothing parameters:
alpha = 0.2107
beta = 1e-04
gamma = 1e-04
Initial states:
l = 8.3114
b = 0.0128
s = 1.706 -7.5146 -8.9321 -10.8604 -7.2023 15.9125
16.891
sigma: 11.3844
AIC AICc BIC
23774.16 23774.33 23840.84
fit.hw1 = hw(daily_train, seasonal = "additive",h = 28) # 23774.33
fit.hw1$model
Holt-Winters' additive method
Call:
hw(y = daily_train, h = 28, seasonal = "additive")
Smoothing parameters:
alpha = 0.2107
beta = 1e-04
gamma = 1e-04
Initial states:
l = 8.3114
b = 0.0128
s = 1.706 -7.5146 -8.9321 -10.8604 -7.2023 15.9125
16.891
sigma: 11.3844
AIC AICc BIC
23774.16 23774.33 23840.84
fit.hw2 = hw(daily_train, seasonal = "mult", h = 28) # 23462.01
fit.hw2$model
Holt-Winters' multiplicative method
Call:
hw(y = daily_train, h = 28, seasonal = "mult")
Smoothing parameters:
alpha = 0.134
beta = 1e-04
gamma = 1e-04
Initial states:
l = 13.8376
b = 0.1209
s = 1.0418 0.8169 0.7615 0.7387 0.8279 1.4021
1.4112
sigma: 0.2847
AIC AICc BIC
23461.85 23462.01 23528.53
fit.hw3 = hw(daily_train, seasonal = "mult", damped = T, h = 28) # 23564.84
fit.hw3$model
Damped Holt-Winters' multiplicative method
Call:
hw(y = daily_train, h = 28, seasonal = "mult", damped = T)
Smoothing parameters:
alpha = 0.096
beta = 1e-04
gamma = 0.0377
phi = 0.98
Initial states:
l = 12.6478
b = 0.2251
s = 1.1251 0.8357 0.7925 0.7678 0.7296 1.3731
1.3762
sigma: 0.2954
AIC AICc BIC
23564.65 23564.84 23636.89
fit.hw4 = hw(daily_train, seasonal = "additive", h = 28, lambda = 'auto') # 12442.36
fit.hw4$model
Holt-Winters' additive method
Call:
hw(y = daily_train, h = 28, seasonal = "additive", lambda = "auto")
Box-Cox transformation: lambda= 0.204
Smoothing parameters:
alpha = 0.2254
beta = 1e-04
gamma = 1e-04
Initial states:
l = 3.3738
b = 8e-04
s = 0.1533 -0.4186 -0.5203 -0.5936 -0.3868 0.8573
0.9087
sigma: 0.5888
AIC AICc BIC
12442.19 12442.36 12508.87
fit.hw5 = hw(daily_train, seasonal = "additive", h = 28, lambda = 0 ) # 9759.407 AAA
fit.hw5$model
Holt-Winters' additive method
Call:
hw(y = daily_train, h = 28, seasonal = "additive", lambda = 0)
Box-Cox transformation: lambda= 0
Smoothing parameters:
alpha = 0.2053
beta = 1e-04
gamma = 0.0025
Initial states:
l = 2.5897
b = 0.0017
s = 0.0841 -0.2075 -0.2421 -0.2643 -0.19 0.3788
0.441
sigma: 0.292
AIC AICc BIC
9759.243 9759.407 9825.920
# fit.hw6 = hw(daily_train, seasonal = "mult", h = 28, lambda = 0 ) # forbidden combination of mult and lambda
# fit.hw6$model
fit.ets2 %>% forecast(h=28) %>%
autoplot() +
ylab("Unit sales") +
autolayer(fitted(fit.ets2), series = 'ETS(A, N, A)')
fit.ets2 %>% forecast(h=28) %>%
autoplot() +
ylab("Unit sales") +
xlim(275, 280)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
fit.ets2 %>% forecast(h=28) %>%
autoplot(PI = F, series = 'ETS(A, N, A)') +
ylab("Unit sales") +
xlim(275, 279) +
autolayer(daily_test) +
autolayer(fit.hw5, series = "additive + log", PI = F)
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
autoplot(daily_train, series = 'observations') +
autolayer(fit.hw5, PI = F) +
autolayer(fitted(fit.hw5),series = "additive + log")+
ggtitle('Observations and predictions from additive HW model with log transformation')
autoplot(daily_test, series = 'Observations') +
autolayer(fit.hw5, PI = F, series = 'Additive HW model') +
ylab("Predited unit sales") +
xlab("Week") +
ggtitle("Predicted daily unit sales (additive HW model with log transformation")
checkresiduals(fit.hw5)
Ljung-Box test
data: Residuals from Holt-Winters' additive method
Q* = 84.376, df = 3, p-value < 2.2e-16
Model df: 11. Total lags used: 14
accuracy(fit.hw5, daily_test) # additive + log
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.862237 11.370557 8.413686 -4.984152 23.81488 0.7249112 0.2060711 NA
Test set 2.234848 9.688775 7.214511 -1.433060 21.13814 0.6215919 0.1248852 0.5210131
summary(ur.kpss(daily_train)) # not stationary
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 8 lags.
Value of test-statistic is: 3.0088
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
summary(ur.kpss(diff(daily_train))) # stationary
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 8 lags.
Value of test-statistic is: 0.003
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
summary(ur.kpss(diff(daily_train, lag=7))) # stationary
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 8 lags.
Value of test-statistic is: 0.0233
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
lambda = BoxCox.lambda(daily_train)
autoplot(diff(daily_train, lag=7)) # seasonally differenced data
daily_train %>% ggtsdisplay()
daily_train %>% diff(lag=7) %>% ggtsdisplay()
daily_train %>%
Arima(c(0,0,1), seasonal = c(0,1,1)) %>%
residuals() %>% ggtsdisplay()
(daily_train %>%
Arima(c(0,0,1), seasonal = c(0,1,1))) # 14786.4
Series: .
ARIMA(0,0,1)(0,1,1)[7]
Coefficients:
ma1 sma1
0.3360 -0.7807
s.e. 0.0191 0.0161
sigma^2 estimated as 136.2: log likelihood=-7390.19
AIC=14786.38 AICc=14786.4 BIC=14803.04
(daily_train %>%
Arima(c(1,0,1), seasonal = c(0,1,1))) # 14663.87
Series: .
ARIMA(1,0,1)(0,1,1)[7]
Coefficients:
ar1 ma1 sma1
0.7673 -0.4165 -0.8652
s.e. 0.0512 0.0736 0.0200
sigma^2 estimated as 127.5: log likelihood=-7327.92
AIC=14663.84 AICc=14663.87 BIC=14686.06
(daily_train %>%
Arima(c(2,0,1), seasonal = c(0,1,1))) # 14577.05
Series: .
ARIMA(2,0,1)(0,1,1)[7]
Coefficients:
ar1 ar2 ma1 sma1
1.2973 -0.3042 -0.9107 -0.9869
s.e. 0.0279 0.0265 0.0141 0.0103
sigma^2 estimated as 120.9: log likelihood=-7283.51
AIC=14577.02 AICc=14577.05 BIC=14604.78
(daily_train %>%
Arima(c(0,0,1), seasonal = c(0,1,2))) # 14786.22
Series: .
ARIMA(0,0,1)(0,1,2)[7]
Coefficients:
ma1 sma1 sma2
0.3357 -0.7555 -0.0351
s.e. 0.0190 0.0231 0.0238
sigma^2 estimated as 136.1: log likelihood=-7389.1
AIC=14786.2 AICc=14786.22 BIC=14808.41
(daily_train %>%
Arima(c(1,0,1), seasonal = c(0,1,2))) # 14662.2
Series: .
ARIMA(1,0,1)(0,1,2)[7]
Coefficients:
ar1 ma1 sma1 sma2
0.7720 -0.4165 -0.8365 -0.0510
s.e. 0.0515 0.0722 0.0260 0.0275
sigma^2 estimated as 127.2: log likelihood=-7326.08
AIC=14662.17 AICc=14662.2 BIC=14689.93
(daily_train %>%
Arima(c(2,0,1), seasonal = c(0,1,2))) # 14568.07
Series: .
ARIMA(2,0,1)(0,1,2)[7]
Coefficients:
ar1 ar2 ma1 sma1 sma2
1.3082 -0.3138 -0.9208 -0.9170 -0.0765
s.e. 0.0271 0.0259 0.0132 0.0237 0.0231
sigma^2 estimated as 120: log likelihood=-7278.01
AIC=14568.03 AICc=14568.07 BIC=14601.34
(daily_train %>%
Arima(c(2,0,1), seasonal = c(1,1,2))) # 14556.24 *****
Series: .
ARIMA(2,0,1)(1,1,2)[7]
Coefficients:
ar1 ar2 ma1 sar1 sma1 sma2
1.3023 -0.3105 -0.9186 0.8309 -1.7596 0.7596
s.e. 0.0293 0.0273 0.0163 0.0518 0.0591 0.0591
sigma^2 estimated as 118.8: log likelihood=-7271.09
AIC=14556.18 AICc=14556.24 BIC=14595.05
(daily_train %>%
Arima(c(2,0,1), seasonal = c(2,1,2))) # 14559.84
Series: .
ARIMA(2,0,1)(2,1,2)[7]
Coefficients:
ar1 ar2 ma1 sar1 sar2 sma1 sma2
1.3077 -0.3146 -0.9214 0.7952 -0.0088 -1.7277 0.7285
s.e. 0.0416 0.0379 0.0197 0.0562 0.0265 0.0537 0.0524
sigma^2 estimated as 119.2: log likelihood=-7271.88
AIC=14559.76 AICc=14559.84 BIC=14604.18
daily_train %>%
Arima(c(2,0,1), seasonal = c(1,1,2)) %>%
residuals() %>% ggtsdisplay()
(fit.arima1 <- daily_train %>%
Arima(c(2,0,1), seasonal = c(1,1,2)))
Series: .
ARIMA(2,0,1)(1,1,2)[7]
Coefficients:
ar1 ar2 ma1 sar1 sma1 sma2
1.3023 -0.3105 -0.9186 0.8309 -1.7596 0.7596
s.e. 0.0293 0.0273 0.0163 0.0518 0.0591 0.0591
sigma^2 estimated as 118.8: log likelihood=-7271.09
AIC=14556.18 AICc=14556.24 BIC=14595.05
checkresiduals(fit.arima1)
Ljung-Box test
data: Residuals from ARIMA(2,0,1)(1,1,2)[7]
Q* = 13.343, df = 8, p-value = 0.1006
Model df: 6. Total lags used: 14
autoplot(daily_test, series = 'Observations') +
autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
ggtitle('Predicted daily unit sales (ARIMA(2,0,1)(1,1,2)[7])') +
ylab('Unit sales') +
xlab('Week')
autoplot(daily_test, series = 'Observations') +
autolayer(fitted(fit.arima1)) +
autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
ggtitle('Daily unit sales (ARIMA(2,0,1)(1,1,2)[7])') +
ylab('Unit sales') +
xlab('Week')
fit.arimalog = auto.arima(daily_train)
checkresiduals(fit.arimalog)
Ljung-Box test
data: Residuals from ARIMA(1,0,1)(0,1,2)[7] with drift
Q* = 53.291, df = 9, p-value = 2.574e-08
Model df: 5. Total lags used: 14
autoplot(daily_test, series = 'Observations') +
autolayer(forecast(fit.arimalog, h=28), PI=F, series = 'Auto arima') +
xlab('Week') +
ylab('Unit sales') +
ggtitle('Predicted daily unit sales')
plots <- list()
for (i in seq(3)) {
fit <- auto.arima(daily_train, xreg = fourier(daily_train, K = i),
seasonal = FALSE, lambda = 0)
plots[[i]] <- autoplot(forecast(fit,
xreg=fourier(daily_train, K=i, h=28))) +
xlim(250,280) +
xlab(paste("K=",i," AICC=",round(fit[["aicc"]],2))) +
ylab("unit sales")
}
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
Scale for 'x' is already present. Adding another scale for 'x', which will replace the existing scale.
gridExtra::grid.arrange(
plots[[1]],plots[[2]],plots[[3]],nrow=3)
checkresiduals(fit.dynamic3)
Ljung-Box test
data: Residuals from Regression with ARIMA(2,1,1) errors
Q* = 5.3023, df = 5, p-value = 0.3801
Model df: 9. Total lags used: 14
(fit.dynamic3 <- auto.arima(daily_train, xreg = fourier(daily_train, K = 3),
seasonal = FALSE, lambda = 0))
Series: daily_train
Regression with ARIMA(2,1,1) errors
Box Cox transformation: lambda= 0
Coefficients:
ar1 ar2 ma1 S1-7 C1-7 S2-7 C2-7 S3-7 C3-7
0.2573 0.0446 -0.9155 0.3332 0.1755 0.0702 -0.1117 -0.0405 0.0213
s.e. 0.0265 0.0256 0.0130 0.0100 0.0100 0.0079 0.0079 0.0073 0.0073
sigma^2 estimated as 0.08135: log likelihood=-310.5
AIC=640.99 AICc=641.11 BIC=696.55
autoplot(daily_test, series = 'Observations') +
autolayer(forecast(fit.dynamic3,
xreg=fourier(daily_train, K=i, h=28)), PI = F, series = 'Dynamic regression') +
ylab("unit sales") +
xlab("Week") +
ggtitle('Predicted unit sales (dynamic regression with ARIMA(2,1,1) errors (K=3))')
NA
print("Linear regression")
[1] "Linear regression"
accuracy(ts(predict.lm(fit.lm1, food_test), frequency = 7, start = c(275,2)), daily_test)
ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set -0.9138804 9.60387 7.269412 -11.12887 23.62771 -0.3169598 0.5655129
print("Additice Holt-Winters' model with log transformation")
[1] "Additice Holt-Winters' model with log transformation"
accuracy(fit.hw5, daily_test)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.862237 11.370557 8.413686 -4.984152 23.81488 0.7249112 0.2060711 NA
Test set 2.234848 9.688775 7.214511 -1.433060 21.13814 0.6215919 0.1248852 0.5210131
print("Manually optimozed ARIMA(2,0,1)(1,1,2)[7]")
[1] "Manually optimozed ARIMA(2,0,1)(1,1,2)[7]"
accuracy(forecast(fit.arima1, h=28), daily_test)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 0.511315 10.864516 8.185594 -6.383054 23.68489 0.7052591 -0.004813012 NA
Test set 1.661816 9.539802 7.227151 -3.130000 21.54173 0.6226810 0.133261719 0.5247036
print("Dynamic regression with ARIMA(2,1,1) errors (K=3)")
[1] "Dynamic regression with ARIMA(2,1,1) errors (K=3)"
accuracy(forecast(fit.dynamic3,
xreg=fourier(daily_train, K=3, h=28)), daily_test)
ME RMSE MAE MPE MAPE MASE ACF1 Theil's U
Training set 1.270446 10.903569 8.168515 -3.81455307 23.05112 0.7037876 0.06327517 NA
Test set 2.725233 9.698695 7.226867 -0.02355306 21.07680 0.6226566 0.12913990 0.5179121
autoplot(daily_test, series = 'Observations') +
autolayer(ts(predict.lm(fit.lm1, food_test), frequency = 7, start = c(275,2) ), series = 'Linear regression') +
autolayer(fit.hw5, PI = F, series = 'Holt-Winters model (log)') +
autolayer(forecast(fit.arima1, h=28), series = 'ARIMA', PI=F) +
autolayer(forecast(fit.dynamic3, xreg=fourier(daily_train, K=i, h=28)), PI = F, series = 'Dynamic regression') +
ylab("Predited unit sales") +
xlab("Week") +
ggtitle("Predicted daily unit sales")
NA